Introduction

Indices of various copeopod groups have been developed: https://noaa-edab.github.io/zooplanktonindex/CopeModResults.html

Now the question is, are the zooplankton available to herring larvae? We will explore data available on herring larvae in the EcoMon (and previous zooplankton) data.

Methods

Herring larvae data were added to the input dataset in the updated script https://github.com/NOAA-EDAB/zooplanktonindex/blob/main/data/VASTzoopindex_processinputs.R and all stations were re-mapped to OISST data to fill missing temperature values if necessary.

Where are herring larvae in each of our seasons?

herringfood_stn <- readRDS(here::here("data/herringfood_stn_all_OISST.rds"))

# make SST column that uses surftemp unless missing or 0

herringfood_stn <- herringfood_stn %>%
  dplyr::mutate(sstfill = ifelse((is.na(sfc_temp)|sfc_temp==0), oisst, sfc_temp))

herringlarvae_stn_fall <- herringfood_stn %>%
  #ungroup() %>%
  filter(season_ng == "FALL", 
         year > 1981) %>%
  mutate(AreaSwept_km2 = 1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = 1,
         Dayofyear = lubridate::yday(date) #as.numeric(as.factor(vessel))-1
  ) %>% 
  dplyr::select(Catch_g = cluhar_100m3, #use megabenwt for individuals input in example
                Year = year,
                Month = month,
                Dayofyear,
                Vessel,
                AreaSwept_km2,
                Lat = lat,
                Lon = lon,
                #btm_temp, #this leaves out many stations
                #sfc_temp, #this leaves out many stations
                #oisst,
                sstfill
  ) %>%
  na.omit() %>%
  as.data.frame()

herringlarvae_stn_spring <- herringfood_stn %>%
  #ungroup() %>%
  filter(season_ng == "SPRING", 
         year > 1981) %>%
  mutate(AreaSwept_km2 = 1, #Elizabeth's code
         #declon = -declon already done before neamap merge
         Vessel = 1,
         Dayofyear = lubridate::yday(date) #as.numeric(as.factor(vessel))-1
  ) %>% 
  dplyr::select(Catch_g = cluhar_100m3, #use megabenwt for individuals input in example
                Year = year,
                Month = month,
                Dayofyear,
                Vessel,
                AreaSwept_km2,
                Lat = lat,
                Lon = lon,
                #btm_temp, #this leaves out many stations
                #sfc_temp, #this leaves out many stations
                #oisst,
                sstfill
  ) %>%
  na.omit() %>%
  as.data.frame()


nonzerofall <- herringlarvae_stn_fall |>
  dplyr::filter(Catch_g > 0) #,
                #Year > 1981)

nonzerospring <- herringlarvae_stn_spring |>
  dplyr::filter(Catch_g > 0) #,
               # Year > 1981)

Fall <- ggplot(data = ecodata::coast) +
  geom_sf() + 
  geom_point(data = FishStatsUtils::northwest_atlantic_grid, aes(x = Lon, y = Lat),  colour = "coral4", size=0.05, alpha=0.1) +
  geom_point(data = nonzerofall, aes(x = Lon, y = Lat), colour = "blue", size=0.5, alpha=1) +
  coord_sf(xlim =c(-78.5, -65.5), ylim = c(33, 45)) + #zoomed to Hatteras and N
  xlab("") +
  ylab("") +
  ggtitle("Fall herring larvae 1982-2022")+
  theme(plot.margin = margin(0, 0, 0, 0, "cm"))

Spring <- ggplot(data = ecodata::coast) +
  geom_sf() + 
  geom_point(data = FishStatsUtils::northwest_atlantic_grid, aes(x = Lon, y = Lat),  colour = "coral4", size=0.05, alpha=0.1) +
  geom_point(data = nonzerospring, aes(x = Lon, y = Lat), colour = "blue", size=0.5, alpha=1) +
  coord_sf(xlim =c(-78.5, -65.5), ylim = c(33, 45)) + #zoomed to Hatteras and N
  xlab("") +
  ylab("") +
  ggtitle("Spring herring larvae 1982-2022")+
  theme(plot.margin = margin(0, 0, 0, 0, "cm"))
 
Spring + Fall

Day of year and months herring larvae found (present, not abundance)

herringlarvae_stn_all <- dplyr::bind_rows(herringlarvae_stn_spring, herringlarvae_stn_fall)

hist(herringlarvae_stn_all$Dayofyear, xlim=c(0,365), breaks=366)

hist(herringlarvae_stn_all$Month, xlim=c(0,12), breaks=13)

Amount of herring larvae found (sum station volume over all years, not an abundance)

sumlarvae <- herringlarvae_stn_all |>
  dplyr::group_by(Month) |>
  dplyr::summarise(totlarvae = sum(Catch_g, na.rm = TRUE))

ggplot2::ggplot(sumlarvae, ggplot2::aes(x=Month, y=totlarvae)) +
  ggplot2::geom_bar(stat = "identity")

So fall larvae, could be used to weight fall small copepods.

What years? Also just summing stations, not an abundance estimate

sumlarvaeyr <- herringlarvae_stn_all |>
  dplyr::group_by(Year) |>
  dplyr::summarise(totlarvae = sum(Catch_g, na.rm = TRUE))

ggplot2::ggplot(sumlarvaeyr, ggplot2::aes(x=Year, y=totlarvae)) +
  ggplot2::geom_bar(stat = "identity")

Initial herring larvae model results: fall only

# from each output folder in pyindex, 
outdir <- here::here("pyindex")
moddirs <- list.dirs(outdir) 
moddirs <- moddirs[-1]
# keep folder name
modnames <- list.dirs(outdir, full.names = FALSE)


# function to apply extracting info
getmodinfo <- function(d.name){
  # read settings
  modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
  modname <- modpath[length(modpath)]
  
  settings <- read.table(file.path(d.name, "settings.txt"), comment.char = "",
    fill = TRUE, header = FALSE)
  
  n_x <- as.numeric(as.character(settings[(which(settings[,1]=="$n_x")+1),2]))
  grid_size_km <- as.numeric(as.character(settings[(which(settings[,1]=="$grid_size_km")+1),2]))
  max_cells <- as.numeric(as.character(settings[(which(settings[,1]=="$max_cells")+1),2]))
  use_anisotropy <- as.character(settings[(which(settings[,1]=="$use_anisotropy")+1),2])
  fine_scale <- as.character(settings[(which(settings[,1]=="$fine_scale")+1),2])
  bias.correct <- as.character(settings[(which(settings[,1]=="$bias.correct")+1),2])
  
  #FieldConfig
  if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Component_1"){
    omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+2),2])
    omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
    epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
    epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+5),1])
    beta1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+6),2])
    beta2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+7),1])
  }
  
  if(settings[(which(settings[,1]=="$FieldConfig")+1),1]=="Omega1"){
    omega1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),1])
    omega2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),1])
    epsilon1 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+3),2])
    epsilon2 <- as.character(settings[(which(settings[,1]=="$FieldConfig")+4),2])
    beta1 <- "IID"
    beta2 <- "IID"
  }
  
  
  #RhoConfig
  rho_beta1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),1]))
  rho_beta2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+3),2]))
  rho_epsilon1 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),1]))
  rho_epsilon2 <- as.numeric(as.character(settings[(which(settings[,1]=="$RhoConfig")+4),2]))
  
  # read parameter estimates, object is called parameter_Estimates
  if(file.exists(file.path(d.name, "parameter_estimates.RData"))) {
    load(file.path(d.name, "parameter_estimates.RData"))
    
    AIC <- parameter_estimates$AIC[1]  
    converged <- parameter_estimates$Convergence_check[1]
    fixedcoeff <- unname(parameter_estimates$number_of_coefficients[2])
    randomcoeff <- unname(parameter_estimates$number_of_coefficients[3])
    
  }else if(file.exists(file.path(d.name, "parameter_estimates.txt"))){
    
    reptext <- readLines(file.path(d.name, "parameter_estimates.txt"))
    
    AIC <- as.double(reptext[grep(reptext, pattern = "AIC")+1])
    converged <- reptext[grep(reptext, pattern = "Convergence_check")+1]
    fixedcoeff <- as.integer(stringr::str_split(reptext[grep(reptext, pattern = "number_of_coefficients")+2], 
                                     boundary("word"))[[1]][2])
    randomcoeff <- as.integer(stringr::str_split(reptext[grep(reptext, pattern = "number_of_coefficients")+2], 
                                     boundary("word"))[[1]][3])
    
  }else{
    
    AIC <- NA_real_
    converged <- NA_character_
    fixedcoeff <- NA_integer_
    randomcoeff <- NA_integer_
  }
  
  
  #index <- read.csv(file.path(d.name, "Index.csv"))
  
  
  # return model attributes as a dataframe
  out <- data.frame(modname = modname,
                    n_x = n_x,
                    grid_size_km = grid_size_km,
                    max_cells = max_cells,
                    use_anisotropy = use_anisotropy,
                    fine_scale =  fine_scale,
                    bias.correct = bias.correct,
                    omega1 = omega1,
                    omega2 = omega2,
                    epsilon1 = epsilon1,
                    epsilon2 = epsilon2,
                    beta1 = beta1,
                    beta2 = beta2,
                    rho_epsilon1 = rho_epsilon1,
                    rho_epsilon2 = rho_epsilon2,
                    rho_beta1 = rho_beta1,
                    rho_beta2 = rho_beta2,
                    AIC = AIC,
                    converged = converged,
                    fixedcoeff = fixedcoeff,
                    randomcoeff = randomcoeff#,
                    #index = index
  )
    
    return(out)

}


modcompare <- purrr::map_dfr(moddirs, getmodinfo)

modselect <- modcompare |>
  dplyr::mutate(season = dplyr::case_when(stringr::str_detect(modname, "_fall_") ~ "Fall",
                            stringr::str_detect(modname, "spring") ~ "Spring",
                            stringr::str_detect(modname, "_all_") ~ "Annual",
                            TRUE ~ as.character(NA))) |>
  dplyr::mutate(converged2 = dplyr::case_when(stringr::str_detect(converged, "no evidence") ~ "likely",
                                stringr::str_detect(converged, "is likely not") ~ "unlikely",
                                TRUE ~ as.character(NA))) |>
  dplyr::mutate(copegroup = stringr::str_extract(modname, "[^_]+")) |>
  #dplyr::mutate(modname = str_extract(modname, '(?<=allagg_).*')) |>
  dplyr::group_by(copegroup, season) |>
  dplyr::mutate(deltaAIC = AIC-min(AIC)) |>
  dplyr::select(copegroup, modname, season, deltaAIC, fixedcoeff,
         randomcoeff, use_anisotropy, 
         omega1, omega2, epsilon1, epsilon2, 
         beta1, beta2, AIC, converged2) |>
  dplyr::arrange(copegroup, season, AIC)

# DT::datatable(modselect, rownames = FALSE, 
#               options= list(pageLength = 25, scrollX = TRUE),
#               caption = "Comparison of delta AIC values using Restricted Maxiumum Likelihood (REML) for alternative fixed and random effects model structures. See text for model descriptions.")

# flextable::flextable(modselect) %>%
#                        #dplyr::select(-c(use_anisotropy, 
#          #omega1, omega2, epsilon1, epsilon2, 
#          #beta1, beta2))
#          #) %>%
#   flextable::set_header_labels(modname = "Model name",
#                                season = "Season",
#                                #deltaAIC = "dAIC",
#                                fixedcoeff = "N fixed",
#                                randomcoeff = "N random",
#                                converged2 = "Convergence") |>
#   #flextable::set_caption("Comparison of delta AIC (dAIC) values using Restricted Maxiumum Likelihood (REML) for alternative fixed and random effects model structures, with number of fixed (N fixed) and random (N random) coefficients. See text for model descriptions associated with each model name.") %>%
#   flextable::fontsize(size = 9, part = "all") %>%
#   flextable::colformat_double(digits = 2) |>
#   flextable::set_table_properties(layout = "autofit", width = 1)

Stations by season

Fall sampling for herring larvae was completed in most years aside from GLOBEC. In our definition of spring, herring larvae primarily ocurr in January and February.

for(d.name in moddirs[str_detect(moddirs, "herring")]){
  
  modpath <- unlist(str_split(d.name, pattern = "/"))
  modname <- modpath[length(modpath)]
  
  cat(modname, "\n")
  cat(paste0("![](",d.name, "/Data_by_year.png)"))
  cat("\n\n")
  
}

herringlarvae_fall_500_biascorrect

herringlarvae_spring_500_biascorrect

Indices by group, season, and region

stratlook <- data.frame(Stratum = c("Stratum_1",
                                    "Stratum_2",
                                    "Stratum_3",
                                    "Stratum_4",
                                    "Stratum_5",
                                    "Stratum_6",
                                    "Stratum_7"),
                        Region  = c("AllEPU",
                                    "her_sp",
                                    "her_fa",
                                    "MAB",
                                    "GB",
                                    "GOM",
                                    "SS"))

# function to apply extracting info
getmodindex <- function(d.name){
  # read settings
  modpath <- stringr::str_split(d.name, "/", simplify = TRUE)
  modname <- modpath[length(modpath)]
  
  if(file.exists(file.path(d.name,"Index.csv"))){
    index <- read.csv(file.path(d.name, "Index.csv"))
  }else{
    stopifnot()
  }
  # return model indices as a dataframe
  out <- data.frame(modname = modname,
                    index
  )
  
  return(out)
}

modcompareindex <- purrr::map_dfr(moddirs, purrr::possibly(getmodindex, otherwise = NULL))

splitoutput <- modcompareindex %>%
  dplyr::mutate(Season = modname |> map(str_split, pattern = "_") |> map_chr(c(1,2))) %>%
  dplyr::mutate(Data = modname |> map(str_split, pattern = "_") |> map_chr(c(1,1))) %>%
  dplyr::mutate(Estimate = ifelse(Estimate == 0, NA, Estimate)) |>
  dplyr::left_join(stratlook) #%>%
  #dplyr::filter(Region %in% c(GOM", "GB", "MAB","SS", "AllEPU")) use all regions

zoomax <- max(splitoutput$Estimate, na.rm=T)


zootsmean <- splitoutput %>%
  dplyr::group_by(modname, Region) %>%
  dplyr::mutate(fmean = mean(Estimate, na.rm=T)) 

Seasonal indices

plot_zooindices <- function(splitoutput, plotdata, plotregions, plottitle){
  
  filterEPUs <- plotregions #c("her_sp", "her_fa", "MAB", "GB", "GOM", "SS", "AllEPU")
  
  seasons <- splitoutput |> dplyr::filter(Data==plotdata) |> dplyr::select(Season) |> dplyr::distinct()
  
  ncols <- dim(seasons)[1]
  
  currentMonth <- lubridate::month(Sys.Date())
  currentYear <- lubridate::year(Sys.Date())
  if (currentMonth > 4) {
    endShade <- currentYear
  } else {
    endShade <- currentYear - 1
  }
  shadedRegion <- c(endShade-9,endShade)
  
  shade.alpha <- 0.3
  shade.fill <- "lightgrey"
  lwd <- 1
  pcex <- 2
  trend.alpha <- 0.5
  trend.size <- 2
  hline.size <- 1
  line.size <- 2
  hline.alpha <- 0.35
  hline.lty <- "dashed"
  label.size <- 5
  hjust.label <- 1.5
  letter_size <- 4
  errorbar.width <- 0.25
  x.shade.min <- shadedRegion[1]
  x.shade.max <- shadedRegion[2]
  
  setup <- list(
    shade.alpha = shade.alpha,
    shade.fill =shade.fill,
    lwd = lwd,
    pcex = pcex,
    trend.alpha = trend.alpha,
    trend.size = trend.size,
    line.size = line.size,
    hline.size = hline.size,
    hline.alpha = hline.alpha,
    hline.lty = hline.lty,
    errorbar.width = errorbar.width,
    label.size = label.size,
    hjust.label = hjust.label,
    letter_size = letter_size,
    x.shade.min = x.shade.min,
    x.shade.max = x.shade.max
  )
  
  
  fix<- splitoutput |>
    dplyr::filter(Data %in% plotdata, #c("calfin"),
                  Region %in% filterEPUs) |>
    dplyr::group_by(Region, Season) |>
    dplyr::summarise(max = max(Estimate, na.rm=T))
  
  p <- splitoutput |>
    dplyr::filter(Data %in% plotdata, #c("calfin"),
                  Region %in% filterEPUs) |>
    dplyr::group_by(Region, Season) |>
    dplyr::left_join(fix) |>
    dplyr::mutate(#Value = Value/resca,
      Mean = as.numeric(Estimate),
      SE = Std..Error.for.Estimate,
      Mean = Mean/max,
      SE = SE/max,
      Upper = Mean + SE,
      Lower = Mean - SE) |>
    ggplot2::ggplot(ggplot2::aes(x = Time, y = Mean, linetype = modname, group = modname))+
    ggplot2::annotate("rect", fill = setup$shade.fill, alpha = setup$shade.alpha,
                      xmin = setup$x.shade.min , xmax = setup$x.shade.max,
                      ymin = -Inf, ymax = Inf) +
    ggplot2::geom_ribbon(ggplot2::aes(ymin = Lower, ymax = Upper, fill = Season), alpha = 0.5)+
    ggplot2::geom_point()+
    ggplot2::geom_line()+
    ggplot2::ggtitle(plottitle)+
    ggplot2::ylab(expression("Relative abundance"))+
    ggplot2::xlab(ggplot2::element_blank())+
    ggplot2::facet_wrap(Region~Season, ncol = ncols, 
                        labeller = label_wrap_gen(multi_line=FALSE))+
    ecodata::geom_gls()+
    ecodata::theme_ts()+
    ecodata::theme_facet()+
    ecodata::theme_title() +
    ggplot2::theme(legend.position = "bottom")
  
  return(p)
}


plot_zooindices(splitoutput = splitoutput, 
             plotdata = "herringlarvae", 
             plotregions = c("her_sp", "her_fa", "MAB", "GB", "GOM", "SS", "AllEPU"), 
             plottitle = "Herring larvae") 

Relative density by area

plotdata <- c("herringlarvae")
plottitle  <-  "Herring larvae"

  fix<- splitoutput |>
    dplyr::filter(Data %in% plotdata #, 
                  #Region %in% filterEPUs
                  ) |>
    dplyr::group_by(Season) |> #Region,
    dplyr::summarise(max = max(Estimate, na.rm=T))
  
  p <- splitoutput |>
    dplyr::filter(Data %in% plotdata #, #c("calfin"),
                  #Region %in% filterEPUs
                  ) |>
    dplyr::group_by(Season) |> #Region, 
    dplyr::left_join(fix) |>
    dplyr::mutate(#Value = Value/resca,
      Mean = as.numeric(Estimate),
      SE = Std..Error.for.Estimate,
      Mean = Mean/max,
      SE = SE/max,
      Upper = Mean + SE,
      Lower = Mean - SE) |>
    ggplot2::ggplot(ggplot2::aes(x = Time, y = Mean, linetype = Region, group = Region))+
    #ggplot2::annotate("rect", fill = setup$shade.fill, alpha = setup$shade.alpha,
    #                  xmin = setup$x.shade.min , xmax = setup$x.shade.max,
    #                  ymin = -Inf, ymax = Inf) +
    ggplot2::geom_ribbon(ggplot2::aes(ymin = Lower, ymax = Upper, fill = Region), alpha = 0.5)+
    ggplot2::geom_point()+
    ggplot2::geom_line()+
    ggplot2::ggtitle(plottitle)+
    ggplot2::ylab(expression("Relative abundance"))+
    ggplot2::xlab(ggplot2::element_blank())+
    ggplot2::facet_wrap(~Season, #Region~ ncol = ncols, 
                        labeller = label_wrap_gen(multi_line=FALSE))+
    #ecodata::geom_gls()+
    ecodata::theme_ts()+
    ecodata::theme_facet()+
    ecodata::theme_title() +
    ggplot2::theme(legend.position = "bottom")
  
 p

Density estimates

for(d.name in moddirs[str_detect(moddirs, "herring")]){
  
  modpath <- unlist(str_split(d.name, pattern = "/"))
  modname <- modpath[length(modpath)]
  
  cat(modname, "\n")
  if(file.exists(paste0(d.name, "/ln_density-predicted.png"))){
    cat(paste0("![](",d.name, "/ln_density-predicted.png)"))
  }
  cat("\n\n")
  
}

herringlarvae_fall_500_biascorrect

herringlarvae_spring_500_biascorrect

Compare with fall small copepod models

Spring large copepod models

The Day of Year covariate model did converge in spring for large copepods.

fallonly <- splitoutput |> dplyr::filter(Season=="spring")

plot_indices_comp(splitoutput = fallonly, 
             plotdata = "lgcopeALL", 
             plotregions = c("her_sp", "her_fa", "MAB", "GB", "GOM", "SS", "AllEPU"), 
             plottitle = "Large copepods (All)",
             comptype = "trend")

fallonly <- splitoutput |> dplyr::filter(Season=="spring")

plot_indices_comp(splitoutput = fallonly, 
             plotdata = "lgcopeALL", 
             plotregions = c("her_sp", "her_fa", "MAB", "GB", "GOM", "SS", "AllEPU"), 
             plottitle = "Large copepods (All)",
             comptype = "scale")